#include "opencl_source_map.hpp" 
namespace MNN { 
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* grid_sample_buf = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_3_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,__private const int global_size_dim2,\n"
"#define DEAL_NON_UNIFORM_DIM3(input1, input2, input3) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1 || input3 >= global_size_dim2) { "" return; "" }\n"
"enum BorderMode {\n"
" BorderMode_ZEROS=0,\n"
" BorderMode_CLAMP=1,\n"
" BorderMode_REFLECTION=2,\n"
" BorderMode_MIN=BorderMode_ZEROS,\n"
" BorderMode_MAX=BorderMode_REFLECTION\n"
"};\n"
"float getPosition(float x,int range,int alignCorners){\n"
" float a=alignCorners == 1? 1.0f : 0.0f;\n"
" float b=alignCorners == 1? 0.0f : 1.0f;\n"
" return ((1.0f+x)*(range-a)-b)/2.0f;\n"
"}\n"
"static int CLAMP(int v,int min,int max) {\n"
" if ((v)<min) {\n"
" (v)=min;\n"
" } else if ((v)>max) {\n"
" (v)=max;\n"
" }\n"
" return v;\n"
"}\n"
"COMPUTE_FLOAT4 sample(int h,int w,\n"
" const int offset_base,\n"
" __global const FLOAT *buffer,\n"
" int height,int width,\n"
" enum BorderMode paddingMode){\n"
" if (h<0 || h >= height || w<0 || w >= width) {\n"
" if(paddingMode == BorderMode_ZEROS)\n"
" {\n"
" return 0.0f;\n"
" }\n"
" // Clearly,CLAMP is the right way to go for GridSamplePaddingMode_BORDER\n"
" // For GridSamplePaddingMode_REFLECTION,since we have reflected the values into (-1,1),\n"
" // the leftover reflections degrade to GridSamplePaddingMode_BORDER\n"
" h=CLAMP(h,0,height-1);\n"
" w=CLAMP(w,0,width-1);\n"
" }\n"
" int offset=(offset_base+h)*width+w;\n"
" return CONVERT_COMPUTE_FLOAT4(vload4(offset,buffer));\n"
"}\n"
"COMPUTE_FLOAT4 sample3d(int d,int h,int w,\n"
" const int offset_base,\n"
" __global const FLOAT *buffer,\n"
" int depth,int height,int width,\n"
" enum BorderMode paddingMode){\n"
" if (d<0 || d >= depth || h<0 || h >= height || w<0 || w >= width) {\n"
" if(paddingMode == BorderMode_ZEROS)\n"
" {\n"
" return 0.0f;\n"
" }\n"
" d=CLAMP(d,0,depth-1);\n"
" h=CLAMP(h,0,height-1);\n"
" w=CLAMP(w,0,width-1);\n"
" }\n"
" int offset=((offset_base+d)*height+h)*width+w;\n"
" return CONVERT_COMPUTE_FLOAT4(vload4(offset,buffer));\n"
"}\n"
"__kernel void nearest_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
" __global const FLOAT* grid,\n"
" __global FLOAT* output,\n"
" __private const int input_height,\n"
" __private const int input_width,\n"
" __private const int output_height,\n"
" __private const int output_width,\n"
" __private const int batch,\n"
" __private const enum BorderMode paddingMode,\n"
" __private const int alignCorners){\n"
" \n"
" const int output_channel_block_idx=get_global_id(0);\n"
" const int output_width_block_idx=get_global_id(1);\n"
" const int output_batch_height_block_idx=get_global_id(2);\n"
" DEAL_NON_UNIFORM_DIM3(output_channel_block_idx,output_width_block_idx,output_batch_height_block_idx);\n"
" const int output_batch_idx=output_batch_height_block_idx/output_height;\n"
" const int output_height_idx=output_batch_height_block_idx % output_height;\n"
" // grid data format has been converted from nchw to nc4hw4\n"
" /* \n"
" (x1,x1,x1,x1) (y1,y2,y3,y4) \n"
" . . \n"
" . . slice\n"
" (x1,y1)...(xn,y1) . . \n"
" . . (xn,xn,xn,xn) (y1,y2,y3,y4)\n"
" . . <-> ---------------------------\n"
" . . (x1,x1,x1,x1) (y5,y6,y7,y8)\n"
" (x1,ym)...(xn,ym) . .\n"
" . . slice\n"
" . .\n"
" (xn,xn,xn,xn) (y5,y6,y7,y8)\n"
" ---------------------------\n"
" */\n"
" // output_width_block_idx means gird y offset,2 means grid width\n"
" const int grid_offset=(output_batch_idx*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" COMPUTE_FLOAT2 grid_xy=CONVERT_COMPUTE_FLOAT2(vload2(grid_offset,grid));\n"
" // get grid x,y\n"
" const float x=(float)grid_xy.x;\n"
" const float y=(float)grid_xy.y;\n"
" // convert grid x,y to input x,y coordinate range\n"
" float in_grid_x=getPosition(x,input_width,alignCorners);\n"
" float in_grid_y=getPosition(y,input_height,alignCorners);\n"
" // get nearest point\n"
" int nw=floor(in_grid_x+0.5f);\n"
" int nh=floor(in_grid_y+0.5f);\n"
" const int inp_offset_base=(output_batch_idx+output_channel_block_idx*batch)*input_height;\n"
" COMPUTE_FLOAT4 value=sample(nh,nw,inp_offset_base,input,input_height,input_width,paddingMode);\n"
" const int output_offset=((output_batch_idx+output_channel_block_idx*batch)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" vstore4(CONVERT_FLOAT4(value),output_offset,output);\n"
"}\n"
"__kernel void bilinear_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
" __global const FLOAT* grid,\n"
" __global FLOAT* output,\n"
" __private const int input_height,\n"
" __private const int input_width,\n"
" __private const int output_height,\n"
" __private const int output_width,\n"
" __private const int batch,\n"
" __private const enum BorderMode paddingMode,\n"
" __private const int alignCorners){\n"
" const int output_channel_block_idx=get_global_id(0);\n"
" const int output_width_block_idx=get_global_id(1);\n"
" const int output_batch_height_block_idx=get_global_id(2);\n"
" DEAL_NON_UNIFORM_DIM3(output_channel_block_idx,output_width_block_idx,output_batch_height_block_idx);\n"
" const int output_batch_idx=output_batch_height_block_idx/output_height;\n"
" const int output_height_idx=output_batch_height_block_idx % output_height;\n"
" // output_width_block_idx means gird y offset,2 means grid width\n"
" const int grid_offset=(output_batch_idx*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" COMPUTE_FLOAT2 grid_xy=CONVERT_COMPUTE_FLOAT2(vload2(grid_offset,grid));\n"
" \n"
" // get grid x,y\n"
" const float x=(float)grid_xy.x;\n"
" const float y=(float)grid_xy.y;\n"
" // convert grid x,y to input x,y coordinate range\n"
" float in_grid_x=getPosition(x,input_width,alignCorners);\n"
" float in_grid_y=getPosition(y,input_height,alignCorners);\n"
" int in_h0=floor(in_grid_y);\n"
" int in_w0=floor(in_grid_x);\n"
" int in_h1=ceil(in_grid_y);\n"
" int in_w1=ceil(in_grid_x);\n"
" float x_weight=in_w1-in_grid_x;\n"
" float y_weight=in_h1-in_grid_y;\n"
" // bilinear interpolation\n"
" const int inp_offset_base=(output_batch_idx+output_channel_block_idx*batch)*input_height;\n"
" COMPUTE_FLOAT4 i00=sample(in_h0,in_w0,inp_offset_base,input,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i01=sample(in_h0,in_w1,inp_offset_base,input,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i10=sample(in_h1,in_w0,inp_offset_base,input,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i11=sample(in_h1,in_w1,inp_offset_base,input,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 value=CONVERT_COMPUTE_FLOAT4(((COMPUTE_FLOAT4)x_weight*CONVERT_COMPUTE_FLOAT4(i00)+(COMPUTE_FLOAT4)(1.0f-x_weight)*CONVERT_COMPUTE_FLOAT4(i01))*(COMPUTE_FLOAT4)y_weight +\n"
" ((COMPUTE_FLOAT4)x_weight*CONVERT_COMPUTE_FLOAT4(i10)+(COMPUTE_FLOAT4)(1.0f-x_weight)*CONVERT_COMPUTE_FLOAT4(i11))*(COMPUTE_FLOAT4)(1.0f- y_weight));\n"
" \n"
" const int output_offset=((output_batch_idx+output_channel_block_idx*batch)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" vstore4(CONVERT_FLOAT4(value),output_offset,output);\n"
"}\n"
"__kernel void nearest5d_buf(GLOBAL_SIZE_3_DIMS __global const FLOAT* input,\n"
" __global const FLOAT* grid,\n"
" __global FLOAT* output,\n"
" __private const int input_height,\n"
" __private const int input_width,\n"
" __private const int input_depth,\n"
" __private const int output_height,\n"
" __private const int output_width,\n"
" __private const int output_depth,\n"
" __private const int batch,\n"
" __private const enum BorderMode paddingMode,\n"
" __private const int alignCorners){\n"
" \n"
" const int output_channel_depth_idx=get_global_id(0);\n"
" const int output_width_block_idx=get_global_id(1);\n"
" const int output_batch_height_block_idx=get_global_id(2);\n"
" \n"
" DEAL_NON_UNIFORM_DIM3(output_channel_depth_idx,output_width_block_idx,output_batch_height_block_idx);\n"
" \n"
" const int output_channel_idx=output_channel_depth_idx/output_depth;\n"
" const int output_depth_idx=output_channel_depth_idx % output_depth;\n"
" const int output_batch_idx=output_batch_height_block_idx/output_height;\n"
" const int output_height_idx=output_batch_height_block_idx % output_height;\n"
" \n"
" const int grid_offset=((output_batch_idx*output_depth+output_depth_idx)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" float3 grid_xyz=convert_float3(vload3(grid_offset,grid));\n"
" const float x=grid_xyz.x;\n"
" const float y=grid_xyz.y;\n"
" const float z=grid_xyz.z;\n"
" float in_grid_x=getPosition(x,input_width,alignCorners);\n"
" float in_grid_y=getPosition(y,input_height,alignCorners);\n"
" float in_grid_z=getPosition(z,input_depth,alignCorners);\n"
" // get nearest point\n"
" int nw=floor(in_grid_x+0.5f);\n"
" int nh=floor(in_grid_y+0.5f);\n"
" int nd=floor(in_grid_z+0.5f);\n"
" const int inp_offset_base=(output_batch_idx+output_channel_idx*batch)*input_depth;\n"
" COMPUTE_FLOAT4 value=sample3d(nd,nh,nw,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" const int output_offset=(((output_batch_idx+output_channel_idx*batch)*output_depth+output_depth_idx)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" vstore4(CONVERT_FLOAT4(value),output_offset,output);\n"
"}\n"
"__kernel void bilinear5d_buf(GLOBAL_SIZE_3_DIMS\n"
" __global const FLOAT* input,\n"
" __global const FLOAT* grid,\n"
" __global FLOAT* output,\n"
" __private const int input_height,\n"
" __private const int input_width,\n"
" __private const int input_depth,\n"
" __private const int output_height,\n"
" __private const int output_width,\n"
" __private const int output_depth,\n"
" __private const int batch,\n"
" __private const enum BorderMode paddingMode,\n"
" __private const int alignCorners){\n"
" const int output_channel_depth_idx=get_global_id(0);\n"
" const int output_width_block_idx=get_global_id(1);\n"
" const int output_batch_height_block_idx=get_global_id(2);\n"
" DEAL_NON_UNIFORM_DIM3(output_channel_depth_idx,output_width_block_idx,output_batch_height_block_idx);\n"
" \n"
" const int output_channel_idx=output_channel_depth_idx/output_depth;\n"
" const int output_depth_idx=output_channel_depth_idx % output_depth;\n"
" const int output_batch_idx=output_batch_height_block_idx/output_height;\n"
" const int output_height_idx=output_batch_height_block_idx % output_height;\n"
" \n"
" const int grid_offset=((output_batch_idx*output_depth+output_depth_idx)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" float3 grid_xyz=convert_float3(vload3(grid_offset,grid));\n"
" \n"
" // get grid x,y\n"
" const float x=grid_xyz.x;\n"
" const float y=grid_xyz.y;\n"
" const float z=grid_xyz.z;\n"
" float in_grid_x=getPosition(x,input_width,alignCorners);\n"
" float in_grid_y=getPosition(y,input_height,alignCorners);\n"
" float in_grid_z=getPosition(z,input_depth,alignCorners);\n"
" int in_d0=floor(in_grid_z);\n"
" int in_h0=floor(in_grid_y);\n"
" int in_w0=floor(in_grid_x);\n"
" int in_d1=ceil(in_grid_z);\n"
" int in_h1=ceil(in_grid_y);\n"
" int in_w1=ceil(in_grid_x);\n"
" \n"
" float x_weight0=in_grid_x-in_w0;\n"
" float x_weight1=1-x_weight0;\n"
" float y_weight0=in_grid_y-in_h0;\n"
" float y_weight1=1-y_weight0;\n"
" float z_weight0=in_grid_z-in_d0;\n"
" float z_weight1=1-z_weight0;\n"
" // bilinear interpolation\n"
" const int inp_offset_base=(output_batch_idx+output_channel_idx*batch)*input_depth;\n"
" COMPUTE_FLOAT4 i000=sample3d(in_d0,in_h0,in_w0,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i001=sample3d(in_d0,in_h0,in_w1,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i010=sample3d(in_d0,in_h1,in_w0,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i011=sample3d(in_d0,in_h1,in_w1,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i100=sample3d(in_d1,in_h0,in_w0,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i101=sample3d(in_d1,in_h0,in_w1,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i110=sample3d(in_d1,in_h1,in_w0,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" COMPUTE_FLOAT4 i111=sample3d(in_d1,in_h1,in_w1,inp_offset_base,input,input_depth,input_height,input_width,paddingMode);\n"
" \n"
" \n"
" COMPUTE_FLOAT4 i00=(COMPUTE_FLOAT4)(x_weight1)*i000+(COMPUTE_FLOAT4)(x_weight0)*i001;\n"
" COMPUTE_FLOAT4 i01=(COMPUTE_FLOAT4)(x_weight1)*i010+(COMPUTE_FLOAT4)(x_weight0)*i011;\n"
" COMPUTE_FLOAT4 i10=(COMPUTE_FLOAT4)(x_weight1)*i100+(COMPUTE_FLOAT4)(x_weight0)*i101;\n"
" COMPUTE_FLOAT4 i11=(COMPUTE_FLOAT4)(x_weight1)*i110+(COMPUTE_FLOAT4)(x_weight0)*i111;\n"
" \n"
" COMPUTE_FLOAT4 i0=(COMPUTE_FLOAT4)(y_weight1)*i00+(COMPUTE_FLOAT4)(y_weight0)*i01;\n"
" COMPUTE_FLOAT4 i1=(COMPUTE_FLOAT4)(y_weight1)*i10+(COMPUTE_FLOAT4)(y_weight0)*i11;\n"
" COMPUTE_FLOAT4 interp=(COMPUTE_FLOAT4)(z_weight1)*i0+(COMPUTE_FLOAT4)(z_weight0)*i1;\n"
" \n"
" const int output_offset=(((output_batch_idx+output_channel_idx*batch)*output_depth+output_depth_idx)*output_height+output_height_idx)*output_width+output_width_block_idx;\n"
" vstore4(CONVERT_FLOAT4(interp),output_offset,output);\n"
"}\n"
;
#endif
}
